home *** CD-ROM | disk | FTP | other *** search
- /* ------------------------------ fftg.c ------------------------------------ */
- /* */
- /* Author: Eyal Lebedinsky */
- /* Date: May 1990 */
- /* Version: 9 June 1991 */
- /* */
- /* Program generator for integer DFFT. This program was developed for a very */
- /* specific need and no attempt was made to make it general. Still, the basic */
- /* algorithm is a standard one from the literature. For maximum performance */
- /* of the integer calculation a special mechanism was employed to track shift */
- /* operations that could be delayed. Coefficients are kept as 16 bits with 1 */
- /* sign bit, 1 integer bit and 14 fractional bits. So a typicall multiply */
- /* will do 16bit*16bits=32bits, then shift down one bit and use the HIGH 16 */
- /* bits as the result. Before adding two numbers they must be scaled down one */
- /* bit to avoid overflow. Adding more numbers calls for more scaling. The */
- /* delayed scaling allows the intermediate array to hold numbers that are */
- /* scaled to a varying degree until a clash occurs. Well, this space is too */
- /* short to elaborate any further and if you did similar kind of work then */
- /* you should know what I am talking about. */
- /* */
- /* usage: fftgc File Size EpName */
- /* File - output file name. */
- /* Size - sample order: 8 for 256 points, 10 for 1024... */
- /* EpName - name of the generated function ('_' added) */
- /* Now assemble the output file and then use as: */
- /* */
- /* #define M 8 [or whatever size] */
- /* #define N (1 << M) */
- /* short x[N]; [input/output array] */
- /* short qf[(N/2)+1]; [power spectrum] */
- /* extern void far fft () */
- /* */
- /* [then call it as: fft();] */
- /* */
- /* This is a modified version of Sorensen et al, IEEE ASSP-35 no 6. */
- /* */
- /* - uses 3+3 complex mult. */
- /* - no pre scrambling. descrambling done when power spectrum calculated. */
- /* - moved to 0-origin as proper for c. */
- /* - trig values in table. */
- /* - the case for sin==cos treated separately. */
- /* - uses delayed scaling down. */
- /* - does length 256 fft with 642 mults. */
- /* */
- /* */
- /* This program is released into the public domain. */
- /* */
- /*----------------------------------------------------------------------------*/
-
- #include <stdio.h>
- #include <math.h>
-
- #define MAXPOINTS 1024
- #define SC15 32768 /* scaling factor 2^15 */
-
- /***
- About SC15:
- Fractions are kept as 16 bit quantities with one sign bit, no integer
- bits and 15 fractional bits. SC15 is used to convert a floating point
- number (in the range -1 .. 1) into such an integer.
-
- There is no integer part because all fractions are really kept already
- scaled down by a factor of 2. This is a result of the fact that all
- fractions are constants (sin() and cos() values) and whenever we
- multiply a point by a fraction we process to scale the result down by
- one bit anywat - so the reduce constants save us some processing time.
- ***/
-
- static short mm[MAXPOINTS+1]; /* digit reverse indices */
-
- /***
- About mm[]:
- The values in the sample points array x[] are not scramled at any
- stage. Instead, the array mm[] indicated where a point should be
- if we DID do the scrambling. All indices into x[] use mm[] and
- this is how the power spectrum extracts the data. This means that
- you CANNOT use the x[] array after calling fft() because the points
- are not in place. If you want to change the output of fft() then just
- modify the last part of this program (the power spectrum logic) to
- calculate whatever you need.
- ***/
-
- static int cntx[9]; /* stats counters */
-
- /***
- About cntx[]:
- This array gathers statistics:
- 0 memory loads
- 1 memory stores
- 2 register shifts
- 3 adds
- 4 mults
- 5 memory shifts due to smad() correction
- ***/
-
- static char *file_name, *ep_name;
-
- extern void start_fft (), end_fft ();
- extern void fft1 (), /* scaling */
- fft2 (), fft3 (), fft4 (), fft5 (), /* SRFFT */
- fft7 (), fft8 (); /* power spectrum */
-
- /***
- about ad[], mad and smad():
- Because we do integer arithmetic, adding two numbers can overflow the
- integer magnitude. For this reason the integers are scaled down (by one
- bit) before the addition. Sometimes we add more that two numbers and it
- is necessary to scale further.
-
- The FFT algorithm does not use all of the points in the same way in one
- pass, which means that some data needs to be scaled and some does not
- need to. Actualy, in the first pass some points are not used at all.
- This means that SOME of the points would have been scaled down. If we
- add (in a later stage) such a point with an un-scaled point the result
- is invalid. For this purpose the array ad[] keeps track of how far each
- point was scaled, smad() checks that a point is properly scaled and mad
- is the needed scaling at this stage. As the FFT progresses, some points
- are kept at a different scaling untill they are used in an expression
- with other points. If smad() finds a point out of adjustment the it
- issues a scaling instruction using the fft1() function.
-
- The program was adjusted to optimize the scaling (the various
- fft2() ... fft5() have internal scaling rules) and you may notice that
- smad() hardly ever needs to do any correction (usually there is only one
- final shift). This makes the whole ad[] almost redundant but it was left
- in for future use.
-
- If your smaple points are less that 16 bits then you still need to keep
- scaling you data. You need to have enough spare bits to avoid the
- scaling, and the number depends on the size of the sample - the larger
- the sample the more stages the FFT has and the more spare bits you
- need.
- ***/
-
- static short ad[MAXPOINTS+1]; /* how far each point is adjusted */
- static int mad; /* maximum adjust at this level */
-
- static void
- smad (i, j) /* set proper adjust */
- int i, j;
- {
- while (ad[i] < (mad-j)) {
- fprintf (stderr, "x[%u] >>= %d (%d-%d)\n", i, 1, mad, j);
- fft1 (mm[i]);
- ++ad[i];
- cntx[0] += 1;
- cntx[1] += 1;
- cntx[2] += 1;
- cntx[5] += 1;
- }
- }
-
- main (argc, argv)
- int argc;
- char *argv[];
- {
- int n, m, j, i, k, is, id, n1, n2, n4, n8, ind,
- i0, i1, i2, i3, i4, i5, i6, i7, i8,
- cc1, ss1, cc3, ss3;
- float e, a, a3;
-
- /* ---------- Handle arguments ------------------------------------------- */
-
- if (argc < 4) {
- printf ("usage: fftg File PowerOf2 EpName [stderrFile]\n");
- exit (0);
- }
-
- file_name = argv[1];
- sscanf (argv[2], "%u", &m);
- ep_name = argv[3];
- if (argc > 4)
- freopen (argv[4], "wt", stderr);
-
- /* ---------- Initialise ------------------------------------------------ */
-
- start_fft (file_name, ep_name);
-
- n = 1 << m;
-
- for (i = 0; i < 9; ++i)
- cntx[i] = 0;
-
- mad = 0;
- for (i = 1; i <= n; ++i)
- ad[i] = 0;
-
- /* ---------- Digit reverse counter -------------------------------------- */
-
- for (i = 1; i <= n; ++i) {
- mm[i] = i-1; /* '-1' for shifting to 0-origin */
- }
-
- j = 1;
- n1 = n - 1;
- for (i = 1; i <= n1; ++i) {
- if (i < j) {
- mm[i] = j-1; /* '-1' for shifting to 0-origin */
- mm[j] = i-1; /* '-1' for shifting to 0-origin */
- }
-
- for (k = n / 2; k < j; k /= 2)
- j -= k;
- j += k;
- }
-
- /* ---------- Length two butterflies ------------------------------------- */
-
- for (is = 1, id = 4; is < n; is = 2 * id - 1, id = 4 * id) {
- for (i0 = is; i0 <= n; i0 += id) {
- i1 = i0 + 1;
-
- smad (i0, 0);
- smad (i1, 0);
- fft2 (mm[i0], mm[i1]);
- ++ad[i0];
- ++ad[i1];
- cntx[0] += 2;
- cntx[1] += 2;
- cntx[2] += 2;
- cntx[3] += 2;
- }
- }
-
- /* ---------- L shaped butterflies --------------------------------------- */
-
- n2 = 2;
- for (k = 2; k <= m; ++k) {
- ++mad;
- n2 = n2 * 2;
- n4 = n2 / 4;
- n8 = n2 / 8;
- e = 6.2831853071719586 / n2; /* 2 * pi */
- cc1 = (SC15/2) * 0.7071067811865475; /* sqrt (0.5) */
-
- for (is = 0, id = n2 * 2; is < n; is = 2 * id - n2, id *= 4) {
- for (i = is; i <= n - 1; i += id) {
- i1 = i + 1;
- i2 = i1 + n4;
- i3 = i2 + n4;
- i4 = i3 + n4;
-
- smad (i1, 0);
- smad (i3, 1);
- smad (i4, 1);
- fft3 (mm[i1], mm[i3], mm[i4]);
- ad[i1] += 1;
- ad[i3] += 2;
- ad[i4] += 2;
- cntx[0] += 3;
- cntx[1] += 3;
- cntx[2] += 5;
- cntx[3] += 4;
-
- if (n4 != 1) {
- i1 = i1 + n8;
- i2 = i2 + n8;
- i3 = i3 + n8;
- i4 = i4 + n8;
-
- smad (i1, 1);
- smad (i2, 0);
- smad (i3, 1);
- smad (i4, 1);
- fft4 (mm[i1], mm[i2], mm[i3], mm[i4],
- cc1);
- ad[i1] += 2;
- ad[i2] += 1;
- ad[i3] += 2;
- ad[i4] += 2;
- cntx[0] += 4;
- cntx[1] += 4;
- cntx[2] += 2;
- cntx[3] += 6;
- cntx[4] += 2;
- }
- }
- }
-
- for (j = 2, a = e; j <= n8; ++j, a += e) {
- a3 = 3 * a;
- cc1 = (SC15/2) * cos (a);
- ss1 = (SC15/2) * sin (a);
- cc3 = (SC15/2) * cos (a3);
- ss3 = (SC15/2) * sin (a3);
-
- for (is = 0, id = n2 * 2; is < n;
- is = 2 * id - n2, id *= 4) {
- for (i = is; i <= n - 1; i += id) {
- i1 = i + j;
- i2 = i1 + n4;
- i3 = i2 + n4;
- i4 = i3 + n4;
- i5 = i + n4 - j + 2;
- i6 = i5 + n4;
- i7 = i6 + n4;
- i8 = i7 + n4;
-
- smad (i1, 0);
- smad (i2, 0);
- smad (i3, 2);
- smad (i4, 2);
- smad (i5, 0);
- smad (i6, 0);
- smad (i7, 1);
- smad (i8, 1);
- if (ind = (ad[i3] < (mad-1))) {
- ++ad[i3];
- ++ad[i4];
- cntx[2] += 1;
- }
- fft5 (mm[i1], mm[i2], mm[i3], mm[i4],
- mm[i5], mm[i6], mm[i7], mm[i8],
- ss1-cc1, -ss1-cc1, cc1,
- ss3-cc3, -ss3-cc3, cc3, ind);
- ad[i1] += 1;
- ad[i2] += 1;
- ad[i3] += 2;
- ad[i4] += 2;
- ad[i5] += 1;
- ad[i6] += 1;
- ad[i7] += 2;
- ad[i8] += 2;
- cntx[0] += 8;
- cntx[1] += 8;
- cntx[2] += 4;
- cntx[3] += 18;
- cntx[4] += 6;
- }
- }
- }
- }
-
- /* ---------- force final adjustment ------------------------------------- */
-
- ++mad;
- for (i = 1; i <= n; ++i)
- smad (i, 0);
-
- /* ---------- Power spectrum (no sqrt) ----------------------------------- */
-
- fft7 (0, mm[1]);
-
- for (i = 1; i < n/2; ++i)
- fft8 (i, mm[i+1], mm[n-i+1]);
-
- fft7 ((n/2), mm[n/2+1]);
-
- /* ---------- end and show stats ----------------------------------------- */
-
- end_fft (file_name, ep_name);
-
- for (i = 0; i < 9; ++i)
- fprintf (stderr, " %5u", i);
- fprintf (stderr, "\n Load Store Shift Add Mult Adj\n");
- for (i = 0; i < 9; ++i)
- fprintf (stderr, " %5u", cntx[i]);
- fprintf (stderr, "\n");
-
- exit (0);
- }
-